pacman::p_load(dplyr,readr,stringr,readxl,stringi,tidyverse,sf)
rm(list=ls())
options(scipen = 999)
################################################################################


################################## Brazil

#2010 territorial division
df_1 <- read_excel("../../data/landuse/raw/dtb_2010.xls", sheet = 1, skip = 1, col_names = FALSE, .name_repair = "unique_quiet")
colnames(df_1) <- c("state_id","state","mesoregion_id","mesoregion","microregion_id","microregion","county_id","county")
df_1 <- df_1 %>%
  mutate(across(everything(), ~ toupper(as.character(.)))) %>%         
  mutate(across(everything(), ~ trimws(.))) %>%                       
  mutate(across(everything(), ~ stri_trans_general(., "Latin-ASCII"))) 
df_1 <- df_1 %>% arrange(county_id)

#2010 land area
df_2 <- read_excel("../../data/landuse/raw/Areas_MU_UF_RE_BR.xls", sheet = 1, col_names = FALSE, .name_repair = "unique_quiet")
df_2 <- df_2[-nrow(df_2), ][-1,]
colnames(df_2) <- c("id", "region_id", "region", "state_id", "state", "state_abb", "county_id", "county", "land_area_km2")
df_2 <- df_2 %>% dplyr::select(region_id, region, county_id, land_area_km2)
df_2 <- df_2 %>% mutate(land_area_km2 = as.numeric(land_area_km2))

#2010 population
df_3 <- read_excel("../../data/landuse/raw/tabela200.xlsx", sheet = 1, skip = 6, col_names = FALSE, .name_repair = "unique_quiet")
df_3 <- df_3[-nrow(df_3),]
colnames(df_3) <- c("county_id", "county", "variable", "population")
df_3 <- df_3 %>% dplyr::select(county_id, population) %>% mutate(population = as.numeric(population))

#Áreas Mínimas Comparáveis (Ehrl, 2017)
df_4 <- read_csv("../../data/landuse/raw/Ehrl_AMCgeneration_EE/_Crosswalk_final_1991_2010.csv", show_col_types = FALSE) %>%
  dplyr::select(code2010, amc) %>% rename(county_id = code2010, amc_id = amc) %>% mutate(county_id = as.character(county_id))

#Merge
df <- df_1 %>%  left_join(df_2, by = "county_id") %>%
  left_join(df_3, by = "county_id") %>% left_join(df_4, by = "county_id")
df <- df %>% mutate(
  county_id = paste0("BR", as.character(county_id)),
  amc_id = paste0("BR", as.character(amc_id)),
  microregion_id = paste0("BR", as.character(microregion_id)),
  mesoregion_id = paste0("BR", as.character(mesoregion_id)),
  state_id = paste0("BR", as.character(state_id)),
  region_id = paste0("BR", as.character(region_id)),
  country = "BRAZIL")
col_order <- c("country", "county_id", "county","amc_id", 
               "microregion_id", "microregion","mesoregion_id","mesoregion",
               "state_id", "state", "region_id", "region",
               "land_area_km2", "population")
df <- df %>% dplyr::select(all_of(col_order))
write_csv(df, gzfile("../../data/landuse/clean/geographicunits_brazil.csv.gz"))


################################## Argentina 

##Geographic units
df <- st_read("../../data/shapefiles/clean/argentina_county.shp", quiet=TRUE)
colnames(df) <- c("country", "state", "state_id", "county", "county_id", "geometry")
df <- data.frame(df) %>% dplyr::select(county, county_id, state, state_id)
df <- df %>% mutate(across(everything(), ~ str_to_upper(as.character(.)))) %>%  
  mutate(across(everything(), str_trim)) %>% mutate(across(everything(), ~ stri_trans_general(., "Latin-ASCII")))
df$state[df$state == "CIUDAD DE BUENOS AIRES"] <- "CAPITAL FEDERAL"
df$county <- df$county %>%
  str_replace_all(c(
    "1 DE MAYO" = "PRIMERO DE MAYO",
    "1RO. DE MAYO" = "PRIMERO DE MAYO",
    "12 DE OCTUBRE" = "DOCE DE OCTUBRE",
    "2 DE ABRIL" = "DOS DE ABRIL",
    "25 DE MAYO" = "VEINTICINCO DE MAYO",
    "9 DE JULIO" = "NUEVE DE JULIO",
    "ADOLFO GONZALES CHAVES" = "GONZALES CHAVES",
    "CORONEL BRANDSEN" = "BRANDSEN",
    "FAMALLA" = "FAMAILLA",
    "CHOS MALAL" = "CHOS MALAL",
    "DR. MANUEL BELGRANO" = "CAPITAL",
    "FLORENTINO AMEGHINO" = "AMEGHINO",
    "GENERAL ANGEL V PENALOZA" = "GENERAL ANGEL V. PENALOZA",
    "JUAN B ALBERDI" = "JUAN B. ALBERDI",
    "JUAN F IBARRA" = "JUAN F. IBARRA",
    "LEANDRO N ALEM" = "LEANDRO N. ALEM",
    "MAYOR LUIS J FONTANA" = "MAYOR LUIS J FONTANA",
    "PRESIDENTE DE LA PLAZA" = "PRESIDENCIA DE LA PLAZA",
    "GENERAL JUAN MADARIAGA" = "GENERAL MADARIAGA",
    "GUAYASAN" = "GUASAYAN",
    "LA CANDELARIA" = "CANDELARIA",
    "CORONEL DE MARINA L ROSALES" = "CORONEL ROSALES",
    "GENERAL LA MADRID" = "GENERAL LAMADRID",
    "GENERAL JUAN F QUIROGA" = "GENERAL JUAN F. QUIROGA",
    "SENGUER" = "SENGUER",
    "GENERAL SARMIENTO" = "CAPITAN SARMIENTO",
    "LA CAPITAL" = "CAPITAL",
    "JOSE CLEMENTE PAZ" = "JOSE C. PAZ",
    "SAN MIGUEL DE TUCUMAN" = "CAPITAL",
    "JUAN MARTIN DE PUYERREDON" = "CAPITAL",
    "SILPICA" = "SILIPICA",
    "LA BANDA" = "BANDA",
    "CHICALCO" = "CHICAL CO",
    "O HIGGINS" = "O'HIGGINS",
    "CORONEL DE MARINA LEONARDO ROSALES" = "CORONEL ROSALES",
    "GRAL BELGRANO" = "GENERAL BELGRANO",
    "LDOR GRAL SAN MARTIN" = "LIBERTADOR GENERAL SAN MARTIN",
    "CONCEPCION DE LA SIERRA" = "CONCEPCION",
    "GENERAL ANGEL VICENTE PENALOZA" = "GENERAL ANGEL V. PENALOZA",
    "GENERAL JUAN FACUNDO QUIROGA" = "GENERAL JUAN F. QUIROGA",
    "JUAN FELIPE IBARRA" = "JUAN F. IBARRA",
    "DISTRITO FEDERAL" = "CIUDAD AUTONOMA DE BUENOS AIRES",
    "SAN FERNANDO \\(1\\)" = "SAN FERNANDO",
    "ISLA" = "SAN SALVADOR",
    "PELEGRINI" = "PELLEGRINI",
    "EL DORADO" = "ELDORADO",
    "PILNANIYEU" = "PILCANIYEU",
    "GENERAL JUAN F.QUIROGA" = "GENERAL JUAN F. QUIROGA",
    "CORONEL DE MARINA L. ROSALES" = "CORONEL ROSALES",
    "PASO DE LOS INDIOS" = "PASO DE INDIOS",
    "COMUNA 1" = "CIUDAD AUTONOMA DE BUENOS AIRES",
    "SAN SALVADORS DEL IBICUY" = "ISLAS DEL IBICUY"  ##Note: the python code does not contain this name, but the output data contains this entry
  ))
df1 <- as.data.frame(df)
df2 <- read.csv("../../data/landuse/raw/departamentos.csv", header = TRUE)
colnames(df2) <- c("county", "state", "land_area_km2", "population", "region")
df2 <- df2 %>% mutate(across(c(county, state), ~ toupper(str_trim(.))))
df <- inner_join(df1, df2, by = c("county", "state"))

df$country <- "ARGENTINA"
df$region_id <- paste0("AR", as.integer(factor(df$region, levels = unique(df$region))) - 1)
df$land_area_km2 <- as.numeric(df$land_area_km2)
df$population <- as.numeric(df$population)
col_order <- c("country", "county_id", "county", "state_id", "state", "region_id", "region", "land_area_km2", "population")
df <- df[, col_order]
write_csv(df, gzfile("../../data/landuse/clean/geographicunits_argentina.csv.gz"))



